@
@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
@
@ Use of this source code is governed by a BSD-style license
@ that can be found in the LICENSE file in the root of the source
@ tree. An additional intellectual property rights grant can be found
@ in the file PATENTS.  All contributing project authors may
@ be found in the AUTHORS file in the root of the source tree.
@

@ This file contains the function WebRtcSpl_ComplexBitReverse(), optimized
@ for ARMv5 platforms.
@ Reference C code is in file complex_bit_reverse.c. Bit-exact.

#include "webrtc/system_wrappers/interface/asm_defines.h"

GLOBAL_FUNCTION WebRtcSpl_ComplexBitReverse
.align  2
DEFINE_FUNCTION WebRtcSpl_ComplexBitReverse
  push {r4-r7}

  cmp r1, #7
  adr r3, index_7                 @ Table pointer.
  mov r4, #112                    @ Number of interations.
  beq PRE_LOOP_STAGES_7_OR_8

  cmp r1, #8
  adr r3, index_8                 @ Table pointer.
  mov r4, #240                    @ Number of interations.
  beq PRE_LOOP_STAGES_7_OR_8

  mov r3, #1                      @ Initialize m.
  mov r1, r3, asl r1              @ n = 1 << stages;
  subs r6, r1, #1                 @ nn = n - 1;
  ble END

  mov r5, r0                      @ &complex_data
  mov r4, #0                      @ ml

LOOP_GENERIC:
  rsb r12, r4, r6                 @ l > nn - mr
  mov r2, r1                      @ n

LOOP_SHIFT:
  asr r2, #1                      @ l >>= 1;
  cmp r2, r12
  bgt LOOP_SHIFT

  sub r12, r2, #1
  and r4, r12, r4
  add r4, r2                      @ mr = (mr & (l - 1)) + l;
  cmp r4, r3                      @ mr <= m ?
  ble UPDATE_REGISTERS

  mov r12, r4, asl #2
  ldr r7, [r5, #4]                @ complex_data[2 * m, 2 * m + 1].
                                  @   Offset 4 due to m incrementing from 1.
  ldr r2, [r0, r12]               @ complex_data[2 * mr, 2 * mr + 1].
  str r7, [r0, r12]
  str r2, [r5, #4]

UPDATE_REGISTERS:
  add r3, r3, #1
  add r5, #4
  cmp r3, r1
  bne LOOP_GENERIC

  b END

PRE_LOOP_STAGES_7_OR_8:
  add r4, r3, r4, asl #1

LOOP_STAGES_7_OR_8:
  ldrsh r2, [r3], #2              @ index[m]
  ldrsh r5, [r3], #2              @ index[m + 1]
  ldr r1, [r0, r2]                @ complex_data[index[m], index[m] + 1]
  ldr r12, [r0, r5]               @ complex_data[index[m + 1], index[m + 1] + 1]
  cmp r3, r4
  str r1, [r0, r5]
  str r12, [r0, r2]
  bne LOOP_STAGES_7_OR_8

END:
  pop {r4-r7}
  bx lr

@ The index tables. Note the values are doubles of the actual indexes for 16-bit
@ elements, different from the generic C code. It actually provides byte offsets
@ for the indexes.

.align  2
index_7:  @ Indexes for stages == 7.
  .short 4, 256, 8, 128, 12, 384, 16, 64, 20, 320, 24, 192, 28, 448, 36, 288
  .short 40, 160, 44, 416, 48, 96, 52, 352, 56, 224, 60, 480, 68, 272, 72, 144
  .short 76, 400, 84, 336, 88, 208, 92, 464, 100, 304, 104, 176, 108, 432, 116
  .short 368, 120, 240, 124, 496, 132, 264, 140, 392, 148, 328, 152, 200, 156
  .short 456, 164, 296, 172, 424, 180, 360, 184, 232, 188, 488, 196, 280, 204
  .short 408, 212, 344, 220, 472, 228, 312, 236, 440, 244, 376, 252, 504, 268
  .short 388, 276, 324, 284, 452, 300, 420, 308, 356, 316, 484, 332, 404, 348
  .short 468, 364, 436, 380, 500, 412, 460, 444, 492

index_8:  @ Indexes for stages == 8.
  .short 4, 512, 8, 256, 12, 768, 16, 128, 20, 640, 24, 384, 28, 896, 32, 64
  .short 36, 576, 40, 320, 44, 832, 48, 192, 52, 704, 56, 448, 60, 960, 68, 544
  .short 72, 288, 76, 800, 80, 160, 84, 672, 88, 416, 92, 928, 100, 608, 104
  .short 352, 108, 864, 112, 224, 116, 736, 120, 480, 124, 992, 132, 528, 136
  .short 272, 140, 784, 148, 656, 152, 400, 156, 912, 164, 592, 168, 336, 172
  .short 848, 176, 208, 180, 720, 184, 464, 188, 976, 196, 560, 200, 304, 204
  .short 816, 212, 688, 216, 432, 220, 944, 228, 624, 232, 368, 236, 880, 244
  .short 752, 248, 496, 252, 1008, 260, 520, 268, 776, 276, 648, 280, 392, 284
  .short 904, 292, 584, 296, 328, 300, 840, 308, 712, 312, 456, 316, 968, 324
  .short 552, 332, 808, 340, 680, 344, 424, 348, 936, 356, 616, 364, 872, 372
  .short 744, 376, 488, 380, 1000, 388, 536, 396, 792, 404, 664, 412, 920, 420
  .short 600, 428, 856, 436, 728, 440, 472, 444, 984, 452, 568, 460, 824, 468
  .short 696, 476, 952, 484, 632, 492, 888, 500, 760, 508, 1016, 524, 772, 532
  .short 644, 540, 900, 548, 580, 556, 836, 564, 708, 572, 964, 588, 804, 596
  .short 676, 604, 932, 620, 868, 628, 740, 636, 996, 652, 788, 668, 916, 684
  .short 852, 692, 724, 700, 980, 716, 820, 732, 948, 748, 884, 764, 1012, 796
  .short 908, 812, 844, 828, 972, 860, 940, 892, 1004, 956, 988
